require("Hmisc")
require("plyr")
require("ggplot2")
require("car")
require("compositions")
require("MatchIt")
require("cem")
require("Zelig")
options(digits=10)
options(scipen=10)
#setwd("/nfs/projects/k/kyu")
setwd("U:/gov2001/paper")

##############################################################
## Matching (volatility)
##############################################################

rm(list=ls())
load("prematch_data_all.RData")
data <- data.prematch

# Set treatment groups
data$TREATV <- NA
data[data$VOLATQ==5,"TREATV"] <- 1
data[data$VOLATQ==1,"TREATV"] <- 0
data$TREATB <- NA
data[data$BETAQ==5,"TREATB"] <- 1
data[data$BETAQ==1,"TREATB"] <- 0

# Take log of cap and volume
data$LOGCAP <- log(data$CAP)
data$LOGVOL <- log(data$VOL)

# Remove rows with missing data
data.vol <- data[,c("TREATV","PERIOD","RETURN","CAP","LOGCAP","VOL","LOGVOL","GROUP","INDUSTRY")]
data.vol <- data.vol[complete.cases(data.vol),]
data.vol <- data.vol[data.vol$CAP>0,]
data.vol <- data.vol[data.vol$VOL>0,]
data.beta <- data[,c("TREATB","PERIOD","RETURN","CAP","LOGCAP","VOL","LOGVOL","GROUP","INDUSTRY")]
data.beta <- data.beta[complete.cases(data.beta),]
data.beta <- data.beta[data.beta$CAP>0,]
data.beta <- data.beta[data.beta$VOL>0,]

# Perform matching to each period in order to prune data
start <- 61
end <- 552
data.prunedv <- data.frame()
data.prunedb <- data.frame()
for(i in start:end) {
  print(i)
  # Volatility
  data.cur <- data.vol[data.vol$PERIOD==i,]
  # CEM
  l <- data.cur$LOGCAP
  logcap.cut <- c(0,quantile(l,seq(.25,.75,.25),na.rm=T))
  l <- data.cur$LOGVOL
  logvol.cut <- c(0,quantile(l,seq(.25,.75,.25),na.rm=T))
  m.out <- matchit(TREATV~LOGCAP+LOGVOL+factor(GROUP),data=data.cur,method="cem",cutpoints=list(LOGCAP=logcap.cut,LOGVOL=logvol.cut))
  # Nearest neighbor
  # m.out <- matchit(TREATV~LOGCAP+LOGVOL+factor(GROUP),data=data.cur,method="nearest")
  m.data <- match.data(m.out)
  data.prunedv <- rbind(data.prunedv,m.data)
  # Beta
  data.cur <- data.beta[data.beta$PERIOD==i,]
  # CEM
  l <- data.cur$LOGCAP
  logcap.cut <- c(0,quantile(l,seq(.25,.75,.25),na.rm=T))
  l <- data.cur$LOGVOL
  logvol.cut <- c(0,quantile(l,seq(.25,.75,.25),na.rm=T))
  m.out <- matchit(TREATB~LOGCAP+LOGVOL+factor(GROUP),data=data.cur,method="cem",cutpoints=list(LOGCAP=logcap.cut,LOGVOL=logvol.cut))
  # Nearest neighbor
  # m.out <- matchit(TREATB~LOGCAP+LOGVOL+factor(GROUP),data=data.cur,method="nearest")
  m.data <- match.data(m.out)
  data.prunedb <- rbind(data.prunedb,m.data)
}

# Post-match returns

# For CEM
data.prunedv$WEIGHT <- data.prunedv$CAP*data.prunedv$weights
data.prunedb$WEIGHT <- data.prunedb$CAP*data.prunedb$weights
ret.volat <- ddply(data.prunedv,.(PERIOD,TREATV),summarise,RETURN=weighted.mean(RETURN,WEIGHT))
ret.beta <- ddply(data.prunedb,.(PERIOD,TREATB),summarise,RETURN=weighted.mean(RETURN,WEIGHT))

# For nearest neighbor
# ret.volat <- ddply(data.prunedv,.(PERIOD,TREATV),summarise,RETURN=weighted.mean(RETURN,CAP))
# ret.beta <- ddply(data.prunedb,.(PERIOD,TREATB),summarise,RETURN=weighted.mean(RETURN,CAP))

# Cumulative returns
cum.volat0 <- cumprod(ret.volat[ret.volat$TREATV==0,"RETURN"]+1)
cum.volat1 <- cumprod(ret.volat[ret.volat$TREATV==1,"RETURN"]+1)
cum.beta0 <- cumprod(ret.beta[ret.beta$TREATB==0,"RETURN"]+1)
cum.beta1 <- cumprod(ret.beta[ret.beta$TREATB==1,"RETURN"]+1)
cum.volat0[492]
cum.volat1[492]
cum.beta0[492]
cum.beta1[492]

# Save to file (split into 2 files to conserve memory)
save(data.vol,data.beta,file="matching_results_cem1.RData")
rm(data.vol,data.beta)
save(data.prunedv,data.prunedb,ret.volat,ret.beta,cum.volat0,cum.volat1,cum.beta0,cum.beta1,file="matching_results_cem2.RData")

##############################################################
## FIGURE 1
##############################################################

rm(list=ls())
load("matching_results_cem1.RData")
load("matching_results_cem2.RData")
load("results_volat_all.RData")
load("results_beta_all.RData")

# Matched Volatility plot
d <- data.frame(YEAR=1963+(61:552)/12,LINE=1,CUMRET=cum.volat0)
d <- rbind(d,data.frame(YEAR=1963+(61:552)/12,LINE=2,CUMRET=cum.volat1))
d <- rbind(d,data.frame(YEAR=1963+(61:552)/12,LINE=3,CUMRET=volat[volat$VOLATQ==1,"CUMRET"]))
d <- rbind(d,data.frame(YEAR=1963+(61:552)/12,LINE=4,CUMRET=volat[volat$VOLATQ==5,"CUMRET"]))
d[d$LINE==1,"Volatility Quintiles"] <- paste("Matched Quintile 1 ($",format(d[d$LINE==1 & d$YEAR==2009,"CUMRET"],digits=2,nsmall=2),")",sep="")
d[d$LINE==2,"Volatility Quintiles"] <- paste("Matched Quintile 5 ($",format(d[d$LINE==2 & d$YEAR==2009,"CUMRET"],digits=2,nsmall=2),")",sep="")
d[d$LINE==3,"Volatility Quintiles"] <- paste("Original Quintile 1 ($",format(d[d$LINE==3 & d$YEAR==2009,"CUMRET"],digits=2,nsmall=2),")",sep="")
d[d$LINE==4,"Volatility Quintiles"] <- paste("Original Quintile 5 ($",format(d[d$LINE==4 & d$YEAR==2009,"CUMRET"],digits=2,nsmall=2),")",sep="")
g <- ggplot(data=d,aes(x=YEAR,y=CUMRET,colour=`Volatility Quintiles`,linetype=`Volatility Quintiles`)) + geom_line(lwd=1)
g <- g + scale_y_log10("Cumulative Return",limits = c(.1,150),breaks=c(.1,1,10,100),labels=c("$0.10","$1","$10","$100"))
g <- g + scale_x_continuous("",limits = c(1968,2009),breaks=seq(1968,2009,5),seq(1968,2009,5),expand=c(0,0))
g <- g + ggtitle("All Stocks, Volatility Quintiles\nCumulative Return of $1 invested in 1968\n")
g <- g + scale_color_manual(values=c("red","blue","red","blue"))
g <- g + scale_linetype_manual(values=c(1,1,3,3))
g <- g + theme(legend.position=c(.2,.8),panel.background=element_blank(),axis.line=element_blank(),panel.grid.minor=element_blank(),panel.grid.major=element_blank(),plot.background=element_rect(fill=NA,colour =NA))
g
#dev.copy(pdf,"matchplotvol.pdf")
dev.copy(png,"matchplotvol.png")
dev.off()

# Matched Beta plot
d <- data.frame(YEAR=1963+(61:552)/12,LINE=1,CUMRET=cum.beta0)
d <- rbind(d,data.frame(YEAR=1963+(61:552)/12,LINE=2,CUMRET=cum.beta1))
d <- rbind(d,data.frame(YEAR=1963+(61:552)/12,LINE=3,CUMRET=beta[beta$BETAQ==1,"CUMRET"]))
d <- rbind(d,data.frame(YEAR=1963+(61:552)/12,LINE=4,CUMRET=beta[beta$BETAQ==5,"CUMRET"]))
d[d$LINE==1,"Beta Quintiles"] <- paste("Matched Quintile 1 ($",format(d[d$LINE==1 & d$YEAR==2009,"CUMRET"],digits=2,nsmall=2),")",sep="")
d[d$LINE==2,"Beta Quintiles"] <- paste("Matched Quintile 5 ($",format(d[d$LINE==2 & d$YEAR==2009,"CUMRET"],digits=2,nsmall=2),")",sep="")
d[d$LINE==3,"Beta Quintiles"] <- paste("Original Quintile 1 ($",format(d[d$LINE==3 & d$YEAR==2009,"CUMRET"],digits=2,nsmall=2),")",sep="")
d[d$LINE==4,"Beta Quintiles"] <- paste("Original Quintile 5 ($",format(d[d$LINE==4 & d$YEAR==2009,"CUMRET"],digits=2,nsmall=2),")",sep="")
g <- ggplot(data=d,aes(x=YEAR,y=CUMRET,colour=`Beta Quintiles`,linetype=`Beta Quintiles`)) + geom_line(lwd=1)
g <- g + scale_y_log10("Cumulative Return",limits = c(.1,150),breaks=c(.1,1,10,100),labels=c("$0.10","$1","$10","$100"))
g <- g + scale_x_continuous("",limits = c(1968,2009),breaks=seq(1968,2009,5),seq(1968,2009,5),expand=c(0,0))
g <- g + ggtitle("All Stocks, Beta Quintiles\nCumulative Return of $1 invested in 1968\n")
g <- g + scale_color_manual(values=c("red","blue","red","blue"))
g <- g + scale_linetype_manual(values=c(1,1,3,3))
g <- g + theme(legend.position=c(.2,.8),panel.background=element_blank(),axis.line=element_blank(),panel.grid.minor=element_blank(),panel.grid.major=element_blank(),plot.background=element_rect(fill=NA,colour =NA))
g
#dev.copy(pdf,"matchplotbeta.pdf")
dev.copy(png,"matchplotbeta.png")
dev.off()


##############################################################
## Balance
##############################################################
# Check pre and post-matching balance
# Perfect balance: L1=0
# Unbalanced: L1=1

data.vol$GROUP <- factor(data.vol$GROUP)
data.prunedv$GROUP <- factor(data.prunedv$GROUP)
data.prunedb$GROUP <- factor(data.prunedb$GROUP)

L1 <- data.frame(period=NA,prevol=NA,postvol=NA,prebeta=NA,postbeta=NA)
N <- data.frame(period=NA,prevol=NA,postvol=NA,prebeta=NA,postbeta=NA)
for(i in 61:552) {
  print(i)
  prevol <- imbalance(group=data.vol[data.vol$PERIOD==i,"TREATV"],data=data.vol[data.vol$PERIOD==i,c("LOGCAP","LOGVOL","GROUP")])$L1$L1
  postvol <- imbalance(group=data.prunedv[data.prunedv$PERIOD==i,"TREATV"],data=data.prunedv[data.prunedv$PERIOD==i,c("LOGCAP","LOGVOL","GROUP")],weights=data.prunedv[data.prunedv$PERIOD==i,"weights"])$L1$L1
  prebeta <- imbalance(group=data.beta[data.beta$PERIOD==i,"TREATB"],data=data.beta[data.beta$PERIOD==i,c("LOGCAP","LOGVOL","GROUP")])$L1$L1
  postbeta <- imbalance(group=data.prunedb[data.prunedb$PERIOD==i,"TREATB"],data=data.prunedb[data.prunedb$PERIOD==i,c("LOGCAP","LOGVOL","GROUP")],weights=data.prunedb[data.prunedb$PERIOD==i,"weights"])$L1$L1
  L1 <- rbind(L1,c(i,prevol,postvol,prebeta,postbeta))
  N <- rbind(N,c(i,nrow(data.vol[data.vol$PERIOD==i,]),nrow(data.prunedv[data.prunedv$PERIOD==i,]),nrow(data.beta[data.beta$PERIOD==i,]),nrow(data.prunedb[data.prunedb$PERIOD==i,])))
}
L1 <- L1[-1,]
N <- N[-1,]
save(L1,N,file="matching_imbalance.RData")

load("matching_imbalance.RData")
apply(L1,2,mean)[-1]
apply(L1,2,sd)[-1]
apply(N,2,mean)[-1]
apply(N,2,sd)[-1]

##############################################################
## Table
##############################################################

# Load data and calculate monthly returns
# Quintile 1: TREAT=0; Quintile 5: TREAT=1
v <- ddply(data.prunedv,.(PERIOD,TREATV),summarise,RETURN=weighted.mean(RETURN,WEIGHT))
b <- ddply(data.prunedb,.(PERIOD,TREATB),summarise,RETURN=weighted.mean(RETURN,WEIGHT))
v[v$TREATV==1,"TREATV"] <- 5
v[v$TREATV==0,"TREATV"] <- 1
colnames(v)[2] <- "VOLATQ"
b[b$TREATB==1,"TREATB"] <- 5
b[b$TREATB==0,"TREATB"] <- 1
colnames(b)[2] <- "BETAQ"

data.rf <- read.delim("famafrench.csv",colClasses=c("numeric","numeric","numeric","numeric","numeric"))
rf <- data.rf[data.rf$DATE>=196801,"RF"]
rmrf <- data.rf[data.rf$DATE>=196801,"RMRF"]

# Calculate cumulative returns
start <- 61
end <- 552
cumret.v <- ddply(v,.(VOLATQ),summarise,CUMRET=prod(RETURN+1))[,2]
cumret.b <- ddply(b,.(BETAQ),summarise,CUMRET=prod(RETURN+1))[,2]

#################
# Volatilty sorts
#################

t.out <- NA
for(i in c(1,5)) t.out <- cbind(t.out,v[v$VOLATQ==i,"RETURN"])
t.out <- t.out[,-1]
# Multiply by 100
t.out <- t.out*100
# Minus risk-free rate or market return
t.rf <- t.out-rf
rm <- rmrf+rf
t.rm <- t.out-rm

# Geometric Mean (rf)
geom <- (apply(100+t.rf,2,geometricmean)-100)*12
# Arithmetic Mean (rf)
meanrf <- apply(t.rf,2,mean)*12
# SD
stdev <- apply(t.out,2,sd)*sqrt(12)
# Sharpe ratio
sharpe <- meanrf/stdev
# Arithmetic Mean (rm)
meanrm <- apply(t.rm,2,mean)*12
# Tracking error
te <- sqrt(apply((t.out-(rm-rf))^2,2,mean))*sqrt(12)
# Information ratio
ir <- meanrm/te
# Beta
pbeta <- apply(t.out,2,function(x){coef(lm(x~rmrf))[2]})
# Alpha
mat.rf <- matrix(rep(rf,2),nrow=492,ncol=2)
mat.rmrf <- t(t(matrix(rep(rmrf,2),nrow=492,ncol=2))*pbeta)
mat.alpha <- t.out-(mat.rf+mat.rmrf)
alpha <- apply(mat.alpha,2,mean)*12

# Assemble table
table.vol <- rbind(cumret.v,geom,meanrf,stdev,sharpe,meanrm,te,ir,pbeta,alpha)
table.vol[c("geom","meanrf","stdev","sharpe","meanrm","te"),] <- table.vol[c("geom","meanrf","stdev","sharpe","meanrm","te"),]/100

#################
# Beta sorts
#################

t.out <- NA
for(i in c(1,5)) t.out <- cbind(t.out,b[b$BETAQ==i,"RETURN"])
t.out <- t.out[,-1]
# Multiply by 100
t.out <- t.out*100
# Minus risk-free rate or market return
t.rf <- t.out-rf
rm <- rmrf+rf
t.rm <- t.out-rm

# Geometric Mean (rf)
geom <- (apply(100+t.rf,2,geometricmean)-100)*12
# Arithmetic Mean (rf)
meanrf <- apply(t.rf,2,mean)*12
# SD
stdev <- apply(t.out,2,sd)*sqrt(12)
# Sharpe ratio
sharpe <- meanrf/stdev
# Arithmetic Mean (rm)
meanrm <- apply(t.rm,2,mean)*12
# Tracking error
te <- sqrt(apply((t.out-(rm-rf))^2,2,mean))*sqrt(12)
# Information ratio
ir <- meanrm/te
# Beta
pbeta <- apply(t.out,2,function(x){coef(lm(x~rmrf))[2]})
# Alpha
mat.rf <- matrix(rep(rf,10),nrow=492,ncol=10)
mat.rmrf <- t(t(matrix(rep(rmrf,10),nrow=492,ncol=10))*pbeta)
mat.alpha <- t.out-(mat.rf+mat.rmrf)
alpha <- apply(mat.alpha,2,mean)*12

# Assemble table
table.beta <- rbind(cumret.b,geom,meanrf,stdev,sharpe,meanrm,te,ir,pbeta,alpha)
table.beta[c("geom","meanrf","stdev","sharpe","meanrm","te"),] <- table.beta[c("geom","meanrf","stdev","sharpe","meanrm","te"),]/100

# Output
table.vol
table.beta

# Periods in which beta 5 quintile > beta 1 quintile
beta1vs5 <- data.frame(period=61:552,beta1=cum.beta0,beta5=cum.beta1,more=cum.beta1>cum.beta0)
beta1vs5[beta1vs5$more,]

# Save to file
save(table.vol,table.beta,beta1vs5,file="data_for_yang.RData")











